EDA_Wankee

Author

Wan Kee

Published

February 17, 2024

Modified

February 28, 2024

1. Import Packages

pacman::p_load(tidyverse, readr, psych, st, stars, tmap, neaSG)
weather_list <- list.files(path = "data",
                           recursive = TRUE,
                           pattern = "\\.csv$",
                           full.names = TRUE)
list(weather_list)
[[1]]
 [1] "data/aspatial/Admiralty.csv"            
 [2] "data/aspatial/Ang Mo Kio.csv"           
 [3] "data/aspatial/Boon Lay (East).csv"      
 [4] "data/aspatial/Changi.csv"               
 [5] "data/aspatial/Choa Chu Kang (South).csv"
 [6] "data/aspatial/Clementi.csv"             
 [7] "data/aspatial/East Coast Parkway.csv"   
 [8] "data/aspatial/Jurong (West).csv"        
 [9] "data/aspatial/Khatib.csv"               
[10] "data/aspatial/Marina Barrage.csv"       
[11] "data/aspatial/Newton.csv"               
[12] "data/aspatial/Pasir Panjang.csv"        
[13] "data/aspatial/Paya Lebar.csv"           
[14] "data/aspatial/Seletar.csv"              
[15] "data/aspatial/Sembawang.csv"            
[16] "data/aspatial/Tai Seng.csv"             
[17] "data/aspatial/Tengah.csv"               
[18] "data/aspatial/Tuas South.csv"           
weather <- read_csv(weather_list)

weather <- weather %>%
  rename_with(~ gsub(" ", "_", .x), everything())

glimpse(weather)
Rows: 168,836
Columns: 13
$ Station                  <chr> "Admiralty", "Admiralty", "Admiralty", "Admir…
$ Year                     <dbl> 2009, 2009, 2009, 2009, 2009, 2009, 2009, 200…
$ Month                    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ Day                      <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14…
$ Daily_Rainfall_Total     <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Highest_30_Min_Rainfall  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Highest_60_Min_Rainfall  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Highest_120_Min_Rainfall <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Mean_Temperature         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Maximum_Temperature      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Minimum_Temperature      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Mean_Wind_Speed          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ Max_Wind_Speed           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
describe(weather)
                          vars      n    mean     sd median trimmed   mad  min
Station*                     1 168836   10.51   5.02     12   10.69  5.93    1
Year                         2 168836 2006.25  12.34   2010 2007.10 13.34 1980
Month                        3 168836    6.54   3.45      7    6.55  4.45    1
Day                          4 168836   15.73   8.80     16   15.72 11.86    1
Daily_Rainfall_Total*        5 167188  210.70 322.45      4  145.27  2.97    1
Highest_30_Min_Rainfall*     6  51716   70.59 123.67      3   40.26  1.48    1
Highest_60_Min_Rainfall*     7  51683   80.76 140.96      3   46.24  1.48    1
Highest_120_Min_Rainfall*    8  51686   91.24 153.54      3   54.96  1.48    1
Mean_Temperature*            9 109321   56.07  12.31     56   56.29 13.34    1
Maximum_Temperature*        10 128383   89.37  17.25     92   90.62 14.83    1
Minimum_Temperature*        11 128336   54.13  14.39     53   53.54 14.83    1
Mean_Wind_Speed*            12 128355  186.14  87.96    229  196.52 34.10    1
Max_Wind_Speed*             13  93405  208.43  75.11    202  206.25 75.61    1
                           max range  skew kurtosis   se
Station*                    18    17 -0.24    -1.27 0.01
Year                      2023    43 -0.50    -0.99 0.03
Month                       12    11 -0.01    -1.21 0.01
Day                         31    30  0.01    -1.19 0.02
Daily_Rainfall_Total*     1283  1282  1.37     0.72 0.79
Highest_30_Min_Rainfall*   435   434  1.77     1.81 0.54
Highest_60_Min_Rainfall*   555   554  1.88     2.49 0.62
Highest_120_Min_Rainfall*  622   621  1.78     2.15 0.68
Mean_Temperature*           96    95 -0.22     0.03 0.04
Maximum_Temperature*       143   142 -0.83     1.29 0.05
Minimum_Temperature*       105   104  0.34    -0.18 0.04
Mean_Wind_Speed*           271   270 -0.98    -0.80 0.25
Max_Wind_Speed*            524   523  0.31     0.09 0.25
write_rds(weather, "data/weather.rds")
weather <- readRDS("data/weather.rds")
mpsz <- st_read(dsn = "data/geospatial", layer = "MPSZ-2019") %>% 
  st_transform(crs=3414)
Reading layer `MPSZ-2019' from data source 
  `/Users/chockwankee/Documents/chockwk/Group11_VAP/EDA/data/geospatial' 
  using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS:  WGS 84
glimpse(mpsz)
Rows: 332
Columns: 7
$ SUBZONE_N  <chr> "MARINA EAST", "INSTITUTION HILL", "ROBERTSON QUAY", "JURON…
$ SUBZONE_C  <chr> "MESZ01", "RVSZ05", "SRSZ01", "WISZ01", "MUSZ02", "MPSZ05",…
$ PLN_AREA_N <chr> "MARINA EAST", "RIVER VALLEY", "SINGAPORE RIVER", "WESTERN …
$ PLN_AREA_C <chr> "ME", "RV", "SR", "WI", "MU", "MP", "WI", "WI", "SI", "SI",…
$ REGION_N   <chr> "CENTRAL REGION", "CENTRAL REGION", "CENTRAL REGION", "WEST…
$ REGION_C   <chr> "CR", "CR", "CR", "WR", "CR", "CR", "WR", "WR", "CR", "CR",…
$ geometry   <MULTIPOLYGON [m]> MULTIPOLYGON (((33222.98 29..., MULTIPOLYGON (…
tmap_mode("view")

tm_shape(mpsz) +
  tm_polygons(col = "REGION_N", palette = "Set2")+
  tm_layout(main.title = "Planning Area",
            main.title.position = "left",
            main.title.size = 1,
            legend.show = FALSE,
            frame = FALSE) +
  tmap_options(check.and.fix = TRUE) +
  tm_view(set.zoom.limits = c(11,12))
tmap_mode("view")

tm_shape(mpsz) +
  tm_polygons(col = "PLN_AREA_C", palette = "Set2")+
  tm_layout(main.title = "Planning Area",
            main.title.position = "left",
            main.title.size = 1,
            legend.show = FALSE,
            frame = FALSE) +
  tm_view(set.zoom.limits = c(11,12))
tmap_mode("view")

tm_shape(mpsz) +
  tm_polygons(col = "SUBZONE_N", palette = "Set2")+
  tm_layout(main.title = "Planning Area",
            main.title.position = "left",
            main.title.size = 1,
            legend.show = FALSE,
            frame = FALSE) +
  tm_view(set.zoom.limits = c(11,12))
unique(mpsz$PLN_AREA_N)
 [1] "MARINA EAST"             "RIVER VALLEY"           
 [3] "SINGAPORE RIVER"         "WESTERN ISLANDS"        
 [5] "MUSEUM"                  "MARINE PARADE"          
 [7] "SOUTHERN ISLANDS"        "BUKIT MERAH"            
 [9] "DOWNTOWN CORE"           "STRAITS VIEW"           
[11] "QUEENSTOWN"              "OUTRAM"                 
[13] "MARINA SOUTH"            "ROCHOR"                 
[15] "KALLANG"                 "TANGLIN"                
[17] "NEWTON"                  "CLEMENTI"               
[19] "BEDOK"                   "PIONEER"                
[21] "JURONG EAST"             "ORCHARD"                
[23] "GEYLANG"                 "BOON LAY"               
[25] "BUKIT TIMAH"             "NOVENA"                 
[27] "TOA PAYOH"               "TUAS"                   
[29] "JURONG WEST"             "SERANGOON"              
[31] "BISHAN"                  "TAMPINES"               
[33] "BUKIT BATOK"             "HOUGANG"                
[35] "CHANGI BAY"              "PAYA LEBAR"             
[37] "ANG MO KIO"              "PASIR RIS"              
[39] "BUKIT PANJANG"           "TENGAH"                 
[41] "SELETAR"                 "SUNGEI KADUT"           
[43] "YISHUN"                  "MANDAI"                 
[45] "PUNGGOL"                 "CHOA CHU KANG"          
[47] "SENGKANG"                "CHANGI"                 
[49] "CENTRAL WATER CATCHMENT" "SEMBAWANG"              
[51] "WESTERN WATER CATCHMENT" "WOODLANDS"              
[53] "NORTH-EASTERN ISLANDS"   "SIMPANG"                
[55] "LIM CHU KANG"           
unique(weather$Station)
 [1] "Admiralty"             "Ang Mo Kio"            "Boon Lay (East)"      
 [4] "Changi"                "Choa Chu Kang (South)" "Clementi"             
 [7] "East Coast Parkway"    "Jurong (West)"         "Khatib"               
[10] "Marina Barrage"        "Newton"                "Pasir Panjang"        
[13] "Paya Lebar"            "Seletar"               "Sembawang"            
[16] "Tai Seng"              "Tengah"                "Tuas South"           
station_to_subzone <- c(
  "Admiralty" = "WOODLANDS",
  "Ang Mo Kio" = "ANG MO KIO",
  "Boon Lay (East)" = "BOON LAY",
  "Changi" = "CHANGI",
  "Choa Chu Kang (South)" = "CHOA CHU KANG",
  "Clementi" = "CLEMENTI",
  "East Coast Parkway" = "BEDOK",
  "Jurong (West)" = "JURONG WEST",
  "Khatib" = "YISHUN",
  "Marina Barrage" = "MARINA SOUTH",
  "Newton" = "NEWTON",
  "Pasir Panjang" = "PASIR PANJANG",
  "Paya Lebar" = "PAYA LEBAR",
  "Seletar" = "SELETAR",
  "Sembawang" = "SEMBAWANG",
  "Tai Seng" = "HOUGANG",
  "Tengah" = "TENGAH",
  "Tuas South" = "TUAS"
)

weather$Subzone <- station_to_subzone[weather$Station]
weather <- weather[, c("Subzone", setdiff(names(weather), "Subzone"))]
head(weather)
# A tibble: 6 × 14
  Subzone  Station  Year Month   Day Daily_Rainfall_Total Highest_30_Min_Rainf…¹
  <chr>    <chr>   <dbl> <dbl> <dbl> <chr>                <chr>                 
1 WOODLAN… Admira…  2009     1     1 <NA>                 <NA>                  
2 WOODLAN… Admira…  2009     1     2 <NA>                 <NA>                  
3 WOODLAN… Admira…  2009     1     3 <NA>                 <NA>                  
4 WOODLAN… Admira…  2009     1     4 <NA>                 <NA>                  
5 WOODLAN… Admira…  2009     1     5 <NA>                 <NA>                  
6 WOODLAN… Admira…  2009     1     6 <NA>                 <NA>                  
# ℹ abbreviated name: ¹​Highest_30_Min_Rainfall
# ℹ 7 more variables: Highest_60_Min_Rainfall <chr>,
#   Highest_120_Min_Rainfall <chr>, Mean_Temperature <chr>,
#   Maximum_Temperature <chr>, Minimum_Temperature <chr>,
#   Mean_Wind_Speed <chr>, Max_Wind_Speed <chr>
weather <- weather %>%
  mutate_at(vars(Daily_Rainfall_Total,
                 Highest_30_Min_Rainfall,
                 Highest_60_Min_Rainfall,
                 Highest_120_Min_Rainfall,
                 Mean_Temperature,
                 Maximum_Temperature),
            as.numeric)
weather_map <- weather %>% 
  group_by(Subzone, Station, Year) %>% 
  summarise(Annual_Rainfall_Total = sum(Daily_Rainfall_Total, na.rm = TRUE),
            Annual_Highest_30_Min_Rainfall = sum(Highest_30_Min_Rainfall, na.rm = TRUE),
            Annual_Highest_60_Min_Rainfall = sum(Highest_60_Min_Rainfall, na.rm = TRUE),
            Annual_Highest_120_Min_Rainfall = sum(Highest_120_Min_Rainfall, na.rm = TRUE),
            Annual_Mean_Temperature = sum(Mean_Temperature, na.rm = TRUE),
            Annual_Maximum_Temperature = sum(Maximum_Temperature, na.rm = TRUE),
            Annual_Minimum_Temperature = sum(Minimum_Temperature, na.rm = TRUE)) %>%
  ungroup()
Error in `summarise()`:
ℹ In argument: `Annual_Minimum_Temperature = sum(Minimum_Temperature,
  na.rm = TRUE)`.
ℹ In group 1: `Subzone = "ANG MO KIO"`, `Station = "Ang Mo Kio"`, `Year =
  2009`.
Caused by error in `sum()`:
! invalid 'type' (character) of argument
glimpse(weather_map)
Error in eval(expr, envir, enclos): object 'weather_map' not found
mpszweather <- left_join(mpsz, weather_map,
                         by = c("PLN_AREA_N" = "Subzone"))
Error in eval(expr, envir, enclos): object 'weather_map' not found
glimpse(mpszweather)
Error in eval(expr, envir, enclos): object 'mpszweather' not found
tm_shape(mpszweather) +
  tm_polygons(col = "Annual_Rainfall_Total", 
              palette = "Blues", 
              style = "jenks")
Error in eval(expr, envir, enclos): object 'mpszweather' not found
tm_shape(mpszweather) +
  tm_polygons(col = "Annual_Highest_30_Min_Rainfall", 
              palette = "Blues", 
              style = "jenks")
Error in eval(expr, envir, enclos): object 'mpszweather' not found
tm_shape(mpszweather) +
  tm_polygons(col = "Annual_Mean_Temperature", 
              palette = "Oranges", 
              style = "jenks")
Error in eval(expr, envir, enclos): object 'mpszweather' not found
tm_shape(mpszweather) +
  tm_polygons(col = "Annual_Maximum_Temperature", 
              palette = "Oranges", 
              style = "jenks")
Error in eval(expr, envir, enclos): object 'mpszweather' not found
tm_shape(mpszweather) +
  tm_polygons(col = "Annual_Minimum_Temperature", 
              palette = "Oranges", 
              style = "jenks")
Error in eval(expr, envir, enclos): object 'mpszweather' not found